The following notebook is a series of figures and tables to review volumetric water content (VWC) as estimated by Hesham using "representative" bulk density values with VWC values estimated by Bryan Carlson using bulk density values taken in 1999. This reports differs from p10_* report. This report loads the measured/calculated values only.
The term "legacy" values refer to those estimated by Heshram. The term "revised" refer to the values estimated by Carlson.
# Setup
import pandas as pd
import pathlib
import glob
import os
import seaborn as sns
import matplotlib.pyplot as plt
from src.p00_calculate_vwc_from_revised_bulkdensity import *
inputDir = pathlib.Path.cwd() / "data" / "input"
workingDir = pathlib.Path.cwd() / "data" / "working"
outputDir = pathlib.Path.cwd() / "data" / "output"
# Get output file to review
filePath = sorted(outputDir.glob("VolumetricWaterContentFromRevisedBd_P2_*.csv"))[-1]
# Load data
revisedVwc = pd.read_csv(filePath)
# Load legacy VWC calculated by Hesham using "representative" bulk density values
vwcAggregate = getVwcSpringFallCalcAggregate(
inputDir / "VwcSpringFallCalc",
[0, 13, 14, 15, 16, 17],
["ID2", "VWC_1", "VWC_2", "VWC_3", "VWC_4", "VWC_5"])
legacyVwc = tidyVwcSpringFallCalcAggregate(
vwcAggregate,
"VolumetricWaterContent")
# Compare VWC values, old/legacy (estimated by Heshram) with new/revised (estimated by Bryan)
vwcCompare = pd.merge(
legacyVwc,
revisedVwc[["ID2", "Year", "Season", "BottomDepth", "VolumetricWaterContent"]],
on=["Year", "Season", "ID2", "BottomDepth"],
suffixes=("_legacy", "_revised"))
sns.set_style("whitegrid")
sns.lmplot(
x="VolumetricWaterContent_legacy",
y="VolumetricWaterContent_revised",
hue="Season",
data=vwcCompare)
vwcCompareMap = pd.merge(
legacyVwc,
revisedVwc[["ID2", "Year", "Season", "Latitude", "Longitude", "BottomDepth", "VolumetricWaterContent"]],
how="right",
on=["Year", "Season", "ID2", "BottomDepth"],
suffixes=("_legacy", "_revised"))
vwcDiff = legacy - revised
import geopandas
gdf = geopandas.GeoDataFrame(
vwcCompareMap,
geometry=geopandas.points_from_xy(vwcCompareMap.Longitude, vwcCompareMap.Latitude))
gdf = gdf.assign(
vwcDiff = gdf["VolumetricWaterContent_legacy"] - gdf["VolumetricWaterContent_revised"])
gdf["coords"] = gdf["geometry"].apply(lambda x: x.representative_point().coords[:])
gdf["coords"] = [coords[0] for coords in gdf["coords"]]
gdf[["VolumetricWaterContent_legacy", "VolumetricWaterContent_revised", "vwcDiff"]].describe()
g = sns.relplot(
data = gdf[gdf["BottomDepth"] == 1.0],
x="Longitude",
y="Latitude",
col="Season",
row="Year",
kind="scatter",
size="vwcDiff",
hue="vwcDiff")
g.set(ylim=(46.775, 46.785))
g.set(xlim=(-117.090, -117.075))
Extreme outliers = points outside 3 * IQR of vwcDiff
# Preperation
dropColsForPrettyPrint = ["Latitude_x", "Longitude_x", "geometry", "coords", "Latitude_y", "Longitude_y", "VolumetricWaterContent"]
sliceEarlyYearsSpring = gdf[(gdf["Season"] == "Spring") & (gdf["Year"] < 2003)]
g = sns.relplot(
data = sliceEarlyYearsSpring,
x="Longitude",
y="Latitude",
col="Year",
row="BottomDepth",
kind="scatter",
size="vwcDiff",
hue="vwcDiff")
g.set(ylim=(46.775, 46.785))
g.set(xlim=(-117.090, -117.075))
qs = sliceEarlyYearsSpring["vwcDiff"].quantile([0.25, 0.50, 0.75])
q1 = qs[0.25]
q3 = qs[0.75]
iqr = q3 - q1
lowOutlier = q1 - (iqr * 3)
highOutlier = q3 + (iqr * 3)
(sliceEarlyYearsSpring[(sliceEarlyYearsSpring["vwcDiff"] < lowOutlier) | (sliceEarlyYearsSpring["vwcDiff"] > highOutlier)]
.merge(revisedVwc, how = "left", on = ["Year", "Season", "ID2", "BottomDepth"])
.drop(columns = dropColsForPrettyPrint)
.sort_values(by = ["ID2", "Year", "Season", "BottomDepth"]))
sliceLateYearsSpring = gdf[(gdf["Season"] == "Spring") & (gdf["Year"] > 2002)]
g = sns.relplot(
data = sliceLateYearsSpring,
x="Longitude",
y="Latitude",
col="Year",
row="BottomDepth",
kind="scatter",
size="vwcDiff",
hue="vwcDiff")
g.set(ylim=(46.775, 46.785))
g.set(xlim=(-117.090, -117.075))
qs = sliceLateYearsSpring["vwcDiff"].quantile([0.25, 0.50, 0.75])
q1 = qs[0.25]
q3 = qs[0.75]
iqr = q3 - q1
lowOutlier = q1 - (iqr * 3)
highOutlier = q3 + (iqr * 3)
(sliceLateYearsSpring[(sliceLateYearsSpring["vwcDiff"] < lowOutlier) | (sliceLateYearsSpring["vwcDiff"] > highOutlier)]
.merge(revisedVwc, how = "left", on = ["Year", "Season", "ID2", "BottomDepth"])
.drop(columns = dropColsForPrettyPrint)
.sort_values(by = ["ID2", "Year", "Season", "BottomDepth"]))
sliceEarlyYearsFall = gdf[(gdf["Season"] == "Fall") & (gdf["Year"] < 2003)]
g = sns.relplot(
data = sliceEarlyYearsFall,
x="Longitude",
y="Latitude",
col="Year",
row="BottomDepth",
kind="scatter",
size="vwcDiff",
hue="vwcDiff")
g.set(ylim=(46.775, 46.785))
g.set(xlim=(-117.090, -117.075))
qs = sliceEarlyYearsFall["vwcDiff"].quantile([0.25, 0.50, 0.75])
q1 = qs[0.25]
q3 = qs[0.75]
iqr = q3 - q1
lowOutlier = q1 - (iqr * 3)
highOutlier = q3 + (iqr * 3)
(sliceEarlyYearsFall[(sliceEarlyYearsFall["vwcDiff"] < lowOutlier) | (sliceEarlyYearsFall["vwcDiff"] > highOutlier)]
.merge(revisedVwc, how = "left", on = ["Year", "Season", "ID2", "BottomDepth"])
.drop(columns = dropColsForPrettyPrint)
.sort_values(by = ["ID2", "Year", "Season", "BottomDepth"]))
sliceLateYearsFall = gdf[(gdf["Season"] == "Fall") & (gdf["Year"] > 2002)]
g = sns.relplot(
data = sliceLateYearsFall,
x="Longitude",
y="Latitude",
col="Year",
row="BottomDepth",
kind="scatter",
size="vwcDiff",
hue="vwcDiff")
g.set(ylim=(46.775, 46.785))
g.set(xlim=(-117.090, -117.075))
qs = sliceLateYearsFall["vwcDiff"].quantile([0.25, 0.50, 0.75])
q1 = qs[0.25]
q3 = qs[0.75]
iqr = q3 - q1
lowOutlier = q1 - (iqr * 3)
highOutlier = q3 + (iqr * 3)
(sliceLateYearsFall[(sliceLateYearsFall["vwcDiff"] < lowOutlier) | (sliceLateYearsFall["vwcDiff"] > highOutlier)]
.merge(revisedVwc, how = "left", on = ["Year", "Season", "ID2", "BottomDepth"])
.drop(columns = dropColsForPrettyPrint)
.sort_values(by = ["ID2", "Year", "Season", "BottomDepth"]))